@inproceedings{tan-bansal-2019-lxmert,
    title = "{LXMERT}: Learning Cross-Modality Encoder Representations from Transformers",
    author = "Tan, Hao  and
      Bansal, Mohit",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
    month = nov,
    year = "2019",
    address = "Hong Kong, China",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/D19-1514",
    doi = "10.18653/v1/D19-1514",
    pages = "5103--5114",
}